

#################################
####      BASE ANALYSE       ####
#################################



# BASE YEAR #

year<-general
year$fin<-2008
year$x<-1
year$duree<-(year$fin-year$q1a+1)

## expand ##
library("epicalc")
year <- expand(year,index.var = "duree")

# variables age & year
year$age<-ave(year$x,year$ident,FUN=cumsum)-1
year$year <- year$q1a+year$age
year<-subset(year,select = c("ident","age","year"))


# BASE LOC_RESP #

loc_resp<-migration
loc_resp$end_08[loc_resp$q601f==2008] <- 1
loc_resp$q601f[loc_resp$q601f==2008] <- 2007
loc_resp$q601f[is.na(loc_resp$q601f)] <- 2008

## expand ##
loc_resp$duree <- (loc_resp$q601f-loc_resp$q601d+1)

# no migration => duree=0 # 
loc_resp$duree[is.na(loc_resp$duree)] <- 0

loc_resp <- expand(loc_resp,index.var = "duree")


as.character(as.numeric(loc_resp$num_mig)) 
loc_resp$id_mig<- paste(loc_resp$ident,loc_resp$num_mig, sep="")
loc_resp$x<-1
loc_resp$cumx<-ave(loc_resp$x, loc_resp$id_mig,FUN=cumsum)-1
loc_resp$year <- loc_resp$q601d+loc_resp$cumx

#continous trajectories
loc_resp$d[loc_resp$q601f==loc_resp$year & loc_resp$year!=2008 & is.na(loc_resp$end_08)]<-1
loc_resp <- loc_resp[ which(is.na(loc_resp$d)),]


loc_resp<-subset(loc_resp,select = c("ident","q602","year"))


## MERGE BASE YEAR & LOC_RESP ##
loc_resp <- merge(year,loc_resp,by=c("ident","year"),all.x=TRUE)
loc_resp$q602[is.na(loc_resp$q602)] <- 99341


# BASE NET_CONJ #

net_conj<-network
net_conj<-subset(net_conj, q901==1)
net_conj$end_08[net_conj$q904f==2008] <- 1
net_conj$q904f[net_conj$q904f==2008] <- 2007
net_conj$q904f[is.na(net_conj$q904f)] <- 2008

net_conj$duree <- (net_conj$q904f-net_conj$q904d+1)
net_conj$duree[is.na(net_conj$duree)] <- 0

net_conj <- expand(net_conj,index.var = "duree")

net_conj$id_unimig <- paste(net_conj$id_uni, net_conj$num_netmig, sep="")

net_conj$x<-1
net_conj$cumx <- ave(net_conj$x, net_conj$id_unimig,FUN=cumsum)-1
net_conj$year <- net_conj$q904d+net_conj$cumx


net_conj$d[net_conj$q904f==net_conj$year & net_conj$year!=2008 & is.na(net_conj$end_08)]<-1
net_conj <- net_conj[ which(is.na(net_conj$d)),]

net_conj<-subset(net_conj,select = c("ident","country","id_uni","year"))                           



# BASE SEPARATION #

separation<-union
separation <- subset(separation,select = c("ident","num_union","id_uni","q102f","q103"))   
separation <- na.omit(separation) 

separation$duree <- (2008-separation$q102f+1)
separation <- expand(separation,index.var = "duree")
separation$x<-1

separation$cumx <- ave(separation$x, separation$id_uni ,FUN=cumsum)-1
separation$year <- separation$q102f+separation$cumx

separation<-subset(separation,select = c("ident","id_uni","num_union","year","q103"))                           



# BASE LOC_CONJ: EXPAND UNION + MERGE NETWORK & SEPARATION #


union<-subset(union,select = c("ident", "q102d", "q102f", "q107", "q105", "id_uni", "num_union"))

union$end_08[union$q102f==2008] <- 1
union$q102f[union$q102f==2008]  <- 2007
union$q102f[is.na(union$q102f)] <- 2008

# uni=1 to control some steps#
union$uni <-1 

union$duree<-(union$q102f-union$q102d+1)
union <- expand(union,index.var = "duree")
union$x<-1
union$cumx<-ave(union$x, union$id_uni ,FUN=cumsum)-1
union$year <- union$q102d+union$cumx
union$x <- NULL

union$d[union$q102f==union$year & union$year!=2008 & is.na(union$end_08)]<-1
union <- union[ which(is.na(union$d)),]


# q105 correction: q105=NA or q105<102d => q105=0 / else 1 #

union$M <- 0
union$M[union$q105<=union$year] <- 1
union$q105 <- NULL
union$q105 <- union$M 
union$M <- NULL

union<-subset(union,select = c("ident", "year", "id_uni" , "num_union", "q107", "q105", "id_uni", "uni"))



# loc_conj: merge union net_conj separation #

loc_conj <- merge(union,separation,   by=c("ident","num_union", "id_uni","year"), all=TRUE, sort=TRUE)
loc_conj <- merge(loc_conj,net_conj,  by=c("ident","id_uni","year"), all.x=TRUE, all.y=FALSE, sort=TRUE)


# country of living = country of birth if country=NA & uni==1 #

loc_conj$country[is.na(loc_conj$country)]<-0
loc_conj$q107[is.na(loc_conj$q107)]<-0


loc_conj$country[loc_conj$country==0 ] <- loc_conj$q107[loc_conj$country==0]


loc_conj <- subset(loc_conj, select = c("ident", "year", "num_union",  "q103", "q105", "uni", "q107", "country"))



# DUMMIES FOR MARITAL STATUS, COMPARISON OF COUNTRIES & FIX OVERLAPPING #

#id variable: ident+year# 
as.character(as.numeric(loc_conj$year)) 

loc_conj$id<- paste(loc_conj$ident,loc_conj$year, sep="")
## dummies V,D,U,M for marital status ##

loc_conj$uni[is.na(loc_conj$uni)]<-0
loc_conj$q103[is.na(loc_conj$q103)] <- 0
loc_conj$q105[is.na(loc_conj$q105)] <- 0


# V=widowed #

loc_conj$x     <- ifelse(loc_conj$q103==2,1,0)
loc_conj$cumx  <- ave(loc_conj$x, loc_conj$id ,FUN=sum)
loc_conj$V     <- ifelse(loc_conj$cumx>0,1,0)
loc_conj$x    <- NULL
loc_conj$cumx <- NULL


# D=divorced, separated #

loc_conj$x    <- ifelse(loc_conj$q103==1,1,0)
loc_conj$cumx <- ave(loc_conj$x, loc_conj$id ,FUN=sum)
loc_conj$D    <- ifelse(loc_conj$cumx>0,1,0)
loc_conj$x    <- NULL
loc_conj$cumx <- NULL

# U= free union#
loc_conj$x    <- ifelse(loc_conj$q105==0 & loc_conj$uni==1,1,0)
loc_conj$cumx <- ave(loc_conj$x, loc_conj$id ,FUN=sum)
loc_conj$U    <- ifelse(loc_conj$cumx>0,1,0)

loc_conj$x    <- NULL
loc_conj$cumx <- NULL

# M= marriage#
loc_conj$x    <- ifelse(loc_conj$q105==1 & loc_conj$uni==1,1,0)
loc_conj$cumx <- ave(loc_conj$x, loc_conj$id ,FUN=sum)
loc_conj$M    <- ifelse(loc_conj$cumx>0,1,0)
loc_conj$x    <- NULL
loc_conj$cumx <- NULL


#num_uni for part II#
loc_conj$num_uni <- loc_conj$num_union
loc_conj$num_uni[loc_conj$uni==0] <- NA

#nbu: count the number of unions each year#
loc_conj$nbu<-ave(loc_conj$uni,loc_conj$id, FUN=sum)


# BASE LOC_RC: merge(loc_conj) & loc_resp #

# Variable id #
loc_resp$id<- paste(loc_resp$ident,loc_resp$year, sep="")

loc_rc <- merge(loc_resp, loc_conj, by=c("id", "ident", "year"), all.x=TRUE, all.y=FALSE, sort=TRUE)
loc_rc <- subset(loc_rc, age > 13)

loc_rc$nbu[is.na(loc_rc$nbu)]<-0

# Comparison of countries #
loc_rc$loc     <- ifelse(loc_rc$q602==loc_rc$country,1,0)
loc_rc$loc[is.na(loc_rc$num_uni)] <- 0
loc_rc$tloc    <-    ave(loc_rc$loc, loc_rc$id ,FUN=sum)
loc_rc$uni_loc <- ifelse(loc_rc$tloc==loc_rc$nbu,1,0)
#loc_conj$loc   <- NULL
#loc_conj$tloc  <- NULL


library(reshape)

loc_rc <- subset(loc_rc,select = c("ident", "year", "age", "q602", "num_union", "num_uni", "id", "country", "nbu", "V", "D", "U", "M", "uni_loc"))

analys <- reshape(loc_rc, v.names=c("country", "num_uni"), idvar=c("id"), timevar="num_union", direction="wide")
analys$id  <- NULL


#####################################
####      VARIABLES ANALYSE      ####
#####################################



#### matrimonial status####

# dummy C=alone (before first union) #
analys$D[is.na(analys$D)] <- 0
analys$V[is.na(analys$V)] <- 0
analys$U[is.na(analys$U)] <- 0
analys$M[is.na(analys$M)] <- 0

analys$C <-ifelse(analys$nbu==0 & analys$D==0 & analys$V==0 ,1,0) 


# categorical variable for marital status#

analys$mar[analys$C==1]                               <- "Alone, never in union"
analys$mar[analys$D==1 & analys$nbu==0]               <- "Alone, separated"
analys$mar[analys$V==1 & analys$nbu==0]               <- "Alone, widowed"
analys$mar[analys$U==1 & analys$nbu==1]               <- "Monogamy, informal union"
analys$mar[analys$M==1 & analys$nbu==1]               <- "Monogamy, married"
analys$mar[analys$U==1 & analys$nbu>1 & analys$M==0]  <- "Polygamy, only informal unions"
analys$mar[analys$M==1 & analys$nbu>1]                <- "Polygamy, at least one marriage"
table(analys$mar)

table(analys$marital)


####TVC for transnational unions####

analys$uni_loc[analys$nbu==0]<-NA


# suni_id #

analys$num_uni.1[is.na(analys$num_uni.1)]<- "."
analys$num_uni.2[is.na(analys$num_uni.2)]<- "."
analys$num_uni.3[is.na(analys$num_uni.3)]<- "."
analys$num_uni.4[is.na(analys$num_uni.4)]<- "."
analys$num_uni.5[is.na(analys$num_uni.5)]<- "."

analys$suni_id<- paste(analys$ident,analys$num_uni.1,analys$num_uni.2,analys$num_uni.3, analys$num_uni.4, analys$num_uni.5, sep="")
analys$suni_id[analys$nbu==0]<-NA


# cuni #
analys$uni    <- ifelse(analys$nbu>0,1,NA)
analys$cuni   <-ave(analys$uni, analys$suni_id ,FUN=cumsum)


# cuni_loc
analys$cuni_loc<-ave(analys$uni_loc, analys$suni_id ,FUN=cumsum)


# variable trans1 #

analys$trans1[is.na(analys$uni)]<-"No partner/spouse"
analys$trans1[analys$uni_loc==0]<-"Transnational"
analys$trans1[analys$uni_loc==1 & analys$cuni_loc<analys$cuni]<-"Reunified"
analys$trans1[analys$uni_loc==1 & analys$cuni_loc==analys$cuni]<-"Always unified"

table(analys$trans1)

analys$trans2[analys$trans1=="No partner/spouse" & analys$C==1 ]<-"Never in union"
analys$trans2[analys$trans1=="No partner/spouse" & analys$C==0 ]<-"Separated or widowed"
analys$trans2[analys$uni_loc==0]<-"Transnational"
analys$trans2[analys$uni_loc==1 & analys$cuni_loc<analys$cuni]<-"Reunified"
analys$trans2[analys$uni_loc==1 & analys$cuni_loc==analys$cuni]<-"Always unified"

table(analys$trans2)


